Analyze Zika infected Glioblastoma data:

### Finding gene modules that make cells susceptible to Zika infection

title: “Finding gene modules that make cells susceptible to Zika infection”
tput:
html_document: default
thub_document: default
g_width: 12
g_height: 4

Load the required R libraries:

library(Seurat)
library(Matrix)
library(EnsDb.Hsapiens.v75)
library(rhdf5)
library(dplyr)
library(ggplot2)
#library(infercnv)

Prior to this analysis we processed the raw sequencing data with cellranger/STAR aligner and ran an algorithm called CellBender on it to remove ambient RNA.

Here I 1.) load the data 2.) change the gene identifiers from ensemble ids to symbols 3.) construct a metadata matrix (with donor information, Zika exposure etc.) 4.) put the data and metadata into a Seurat object. 5.) load mitochondrial genes from a database for later use 6.) Remove doublets, by loading the doublet scores I got from an algorithm (called scrublet) I ran on the data before.

setwd('/home/jovyan/HB_ZIK/')
# glioblastoma_counts = read.delim('../data/ZikaGlioblastomas/tic-527/study5953-tic527-star-fc-genecounts.txt', row.names = 1)
# manifest = read.delim('../HB_ZIK/5953stdy_manifest_14517_170919_HB_ZIK_046_048_050_051.csv', sep = ',', skip = 8)
# manifest = manifest[manifest$SUPPLIER.SAMPLE.NAME != "",]
# scrublet_classification = read.delim('scrublet/SmartSeq_predicted_doublets.txt')
# glioblastoma_counts = glioblastoma_counts[,scrublet_classification != 1]
# glioblastoma_metadata = data.frame(matrix('', dim(glioblastoma_counts)[2],4))
# colnames(glioblastoma_metadata) = c('SampleName', 'Technology', 'ZikaExposure', 'Patient')
# glioblastoma_metadata[,1] = unlist(lapply(colnames(glioblastoma_counts), function(x) substring(x,2)))
# glioblastoma_metadata[,2] = rep('SmartSeq', dim(glioblastoma_counts)[2])
# glioblastoma_metadata[,3] = rep('TRUE', dim(glioblastoma_counts)[2])
# glioblastoma_metadata[,4] = substring(manifest$SUPPLIER.SAMPLE.NAME[match(glioblastoma_metadata$SampleName, manifest$SANGER.SAMPLE.ID)], 3, 4)
# patients = c('42','42','43', '43', '45', '45', '46', '46') # (info from sample tracker)
# geneNames = as.matrix(read.table(paste('/home/jovyan/data/HB_ZIK/HB_ZIK/cellranger302_count_32771_5953STDY855119', as.character(1), '_GRCh38-3_0_0_premrna/filtered_feature_bc_matrix/features.tsv', sep = '')))
# 
# geneOccurence = table(geneNames[,2])
# for (gene in names(geneOccurence)){
#   if (geneOccurence[gene] > 1){
#     geneNames[which(geneNames[,2] == gene),2] = paste(geneNames[which(geneNames[,2] == gene),2], '(', geneNames[which(geneNames[,2] == gene),1], ')', sep = '')
#   }
# }
# rownames(glioblastoma_counts) = geneNames[match(rownames(glioblastoma_counts), geneNames[,1]),2]
# glioblastoma_counts = glioblastoma_counts[geneNames[,2],]
# for (i in 1:8){
#   print(i)
#   # prefiltered count_matrix:
#   data_subset <- as.matrix(Read10X_h5(filename = paste('../data/HB_ZIK/HB_ZIK/cellranger302_count_32771_5953STDY855119', as.character(i), '_GRCh38-3_0_0_premrna/output_filtered.h5', sep = ''), use.names = TRUE))
#   scrublet_classification = read.delim(paste('scrublet/5953STDY855119', as.character(i), '_predicted_doublets.txt', sep = ''))
#   print('removed doublets:')
#   print(sum(scrublet_classification))
#   data_subset = data_subset[,scrublet_classification != 1]
#   sampleNames = rownames(data_subset)
#   glioblastoma_counts = cbind(glioblastoma_counts, data_subset)
#   metadata_subset = data.frame(matrix('', dim(data_subset)[2],4))
#   colnames(metadata_subset) = c('SampleName', 'Technology', 'ZikaExposure', 'Patient')
#   metadata_subset[,1] = colnames(data_subset)
#   metadata_subset[,2] = rep('10X', dim(data_subset)[2])
#   metadata_subset[,3] = rep('FALSE', dim(data_subset)[2])
#   metadata_subset[,4] = rep(patients[i], dim(data_subset)[2])
#   glioblastoma_metadata = rbind(glioblastoma_metadata, metadata_subset)
# }
# mitogenes <- genes(EnsDb.Hsapiens.v75, filter = ~ seq_name == "MT")$gene_id
# mitogenes = geneNames[,2][match(mitogenes, geneNames[,1])]
# mitogenes = mitogenes[!is.na(mitogenes)]
# percent.mt = colSums(glioblastoma_counts[rownames(glioblastoma_counts) %in% mitogenes,])/colSums(glioblastoma_counts)
# Glioblastoma <- CreateSeuratObject(glioblastoma_counts, project = 'HB_ZIK', min.cells = 0, min.features = 0)
# Glioblastoma$SampleName = colnames(glioblastoma_counts)
# Glioblastoma$Technology = glioblastoma_metadata$Technology
# Glioblastoma$ZikaExposure = glioblastoma_metadata$ZikaExposure
# Glioblastoma$Patient = glioblastoma_metadata$Patient
# Glioblastoma$percent.mt = percent.mt
# saveRDS(Glioblastoma, file = "../data/ZikaGlioblastomas/zikaGlioblastomas_SeuratObject.rds")
Glioblastoma = readRDS("../data/ZikaGlioblastomas/zikaGlioblastomas_SeuratObject.rds")
Glioblastoma = Glioblastoma[,!is.na(Glioblastoma$Patient)]
Glioblastoma = Glioblastoma[,Glioblastoma$Patient == '46']

The QC plots using number of detected genes, number of counts and percent of counts coming from mitochondrial genes (as a proxy for stress), show a couple of outlier cells, which I remove:

Glioblastoma.list <- SplitObject(Glioblastoma, split.by = 'Technology')
i = 1
VlnPlot(Glioblastoma.list[[i]], features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = 'Patient')

plot1 <- FeatureScatter(Glioblastoma.list[[i]], feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(Glioblastoma.list[[i]], feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))

Glioblastoma.list[[i]] <- subset(Glioblastoma.list[[i]], subset = nFeature_RNA > 2500 & nFeature_RNA < 12000 & nCount_RNA < 2*10^6 & percent.mt < 0.1)
VlnPlot(Glioblastoma.list[[i]], features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = 'Patient')

i = 2
VlnPlot(Glioblastoma.list[[i]], features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = 'Patient')

plot1 <- FeatureScatter(Glioblastoma.list[[i]], feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(Glioblastoma.list[[i]], feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))

Glioblastoma.list[[i]] <- subset(Glioblastoma.list[[i]], subset = nFeature_RNA > 500 & nFeature_RNA < 5000 & nCount_RNA > 0 & nCount_RNA < 4*10^5 & percent.mt < 0.1)
VlnPlot(Glioblastoma.list[[i]], features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = 'Patient')

The following normalizes, scales and select 2000 particularly variable genes for the 10x data:

Glioblastoma.10x = SplitObject(Glioblastoma.list[[2]], split.by = 'Patient')
for (i in 1){
Glioblastoma.10x[[i]] <- NormalizeData(Glioblastoma.10x[[i]], normalization.method = "LogNormalize", scale.factor = 10000)
Glioblastoma.10x[[i]] <- FindVariableFeatures(Glioblastoma.10x[[i]], selection.method = "vst", nfeatures = 2000)
top25 <- head(VariableFeatures(Glioblastoma.10x[[i]]), 25)
plot1 <- VariableFeaturePlot(Glioblastoma.10x[[i]])
plot2 <- LabelPoints(plot = plot1, points = top25, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
all.genes <- rownames(Glioblastoma.10x[[i]])
Glioblastoma.10x[[i]] <- ScaleData(Glioblastoma.10x[[i]], features = all.genes)
}

The following normalizes, scales and select 2000 particularly variable genes for the SmartSeq data:

Glioblastoma.SmartSeq = SplitObject(Glioblastoma.list[[1]], split.by = 'Patient')
rm(Glioblastoma)
rm(Glioblastoma.list)
for (i in 1){
Glioblastoma.SmartSeq[[i]] <- NormalizeData(Glioblastoma.SmartSeq[[i]], normalization.method = "LogNormalize", scale.factor = 10000)
Glioblastoma.SmartSeq[[i]] <- FindVariableFeatures(Glioblastoma.SmartSeq[[i]], selection.method = "vst", nfeatures = 2000)
top25 <- head(VariableFeatures(Glioblastoma.SmartSeq[[i]]), 25)
plot1 <- VariableFeaturePlot(Glioblastoma.SmartSeq[[i]])
plot2 <- LabelPoints(plot = plot1, points = top25, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
all.genes <- rownames(Glioblastoma.SmartSeq[[i]])
Glioblastoma.SmartSeq[[i]] <- ScaleData(Glioblastoma.SmartSeq[[i]], features = all.genes)
}

Calculate principal components:

for (i in 1){
Glioblastoma.10x[[i]] <- RunPCA(Glioblastoma.10x[[i]], features = VariableFeatures(object = Glioblastoma.10x[[i]]))
print(Glioblastoma.10x[[i]][["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(Glioblastoma.10x[[i]], dims = 1:2, reduction = "pca")
DimPlot(Glioblastoma.10x[[i]], reduction = "pca")
DimHeatmap(Glioblastoma.10x[[i]], dims = 1, cells = round(dim(Glioblastoma.10x[[i]])[2])/2, balanced = TRUE)
DimHeatmap(Glioblastoma.10x[[i]], dims = 1:15, cells = round(dim(Glioblastoma.10x[[i]])[2])/2, balanced = TRUE)
}
## PC_ 1 
## Positive:  ST6GALNAC3, MAN2A1, NEAT1, SPP1, PDK4 
## Negative:  SOX5, PTPRZ1, GPM6A, SNTG1, NRXN1 
## PC_ 2 
## Positive:  DNAJC6, RNF220, PEX5L, TMTC2, SPOCK3 
## Negative:  CHST11, CELF2, NHSL1, SAT1, LRMDA 
## PC_ 3 
## Positive:  IGFBP7, COL4A1, COL8A1, PRKG1, COL4A2 
## Negative:  LINC01322, GLCCI1, ADAMTSL1, FAM110B, SLC8A3 
## PC_ 4 
## Positive:  DPP10, CHL1, CADPS, TNC, SHISA9 
## Negative:  EGFL7, MYO1B, ABCB1, ADGRL4, VWF 
## PC_ 5 
## Positive:  DCLK1, ADGRV1, ROBO2, WDR49, PDZRN3 
## Negative:  ADARB2, SORCS1, HS6ST3, TMEM132D, RIMS2

Based on the JackStraw procedure I select 14 PCs for further analysis in both cases:

for (i in 1){
Glioblastoma.10x[[i]] <- JackStraw(Glioblastoma.10x[[i]], num.replicate = 100)
Glioblastoma.10x[[i]] <- ScoreJackStraw(Glioblastoma.10x[[i]], dims = 1:20)
JackStrawPlot(Glioblastoma.10x[[i]], dims = 1:20)
ElbowPlot(Glioblastoma.10x[[i]], ndims = 40)
}

This is the clustering step:

n_dimensions = 14
for (i in 1){
Glioblastoma.10x[[i]] <- FindNeighbors(Glioblastoma.10x[[i]], dims = 1:n_dimensions)
Glioblastoma.10x[[i]] <- FindClusters(Glioblastoma.10x[[i]], resolution = 0.5)
head(Idents(Glioblastoma.10x[[i]]), 5)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10949
## Number of edges: 367618
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9091
## Number of communities: 13
## Elapsed time: 1 seconds

Clusters roughly agree with visual seperation on a UMAP plot:

for (i in 1){
Glioblastoma.10x[[i]] <- RunUMAP(Glioblastoma.10x[[i]], dims = 1:n_dimensions)
print(DimPlot(Glioblastoma.10x[[i]], reduction = "umap", label = TRUE))
}

Find and visualize top cluster markers:

i = 1
markers <- FindAllMarkers(Glioblastoma.10x[[i]], only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
top = markers %>% group_by(cluster) %>% top_n(n = 1, wt = avg_logFC)
pdf('figures/FeaturePlotUnbiasedClusterMarkers_10x_Patient46.pdf', width = 20, height = 10)
print(FeaturePlot(Glioblastoma.10x[[i]], features = top$gene))
dev.off()
## png 
##   2
print(FeaturePlot(Glioblastoma.10x[[i]], features = top$gene))

top10<- markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
pdf('figures/HeatmapUnbiasedClusterMarkers_10x_Patient46.pdf', width = 20, height = 10)
print(DoHeatmap(Glioblastoma.10x[[i]], features = top10$gene, cells = 1:dim(Glioblastoma.10x[[i]])[2] + NoLegend()) + 
    theme(text = element_text(size = 5)))
dev.off()
## png 
##   2
print(DoHeatmap(Glioblastoma.10x[[i]], features = top10$gene, cells = 1:dim(Glioblastoma.10x[[i]])[2] + NoLegend()) + 
    theme(text = element_text(size = 5)))

Vizualize a priori chosen markers of tumour subtypes and brain cell types:

options(stringsAsFactors = FALSE)
modules = read.delim('/home/jovyan/HB_ZIK/1-s2.0-S0092867419306877-mmc2.csv', skip = 4, sep = ',')

cell.markers = c(
  'Neurons' =  'RBFOX3', # NeuN
  'Immature Neurons' = 'DCX',
  'NSC.1' = 'EMX2', # Apparently also in astrocytes
  'NSC.2' = 'PAX6',
  'Activated neurons' = 'FOS', 

  'NSC and Astrocytes' = 'GFAP',

  'VGLUT1 excitatory' = 'SLC17A7',
  'VGLUT2 excitatory' = 'SLC17A6',

  'Layer I inhibitory' = 'RELN',
  'Layer II excitatory' = 'LAMP5',
  'Layer II/III-IV excitatory.1' = 'CUX1',
  'Layer II/III-IV excitatory.2' = 'CUX2',
  'Layer IV excitatory' = 'RORB',
  'Layer Va excitatory' = 'PCP4',
  'Layer Vb excitatory' = 'HTR2C',
  'Layer VIa excitatory' = 'OPRK1',
  'Layer VIb excitatory' = 'NR4A2',

  'HPA Upper layer' = 'TPPP3',
  'HPA Mid layer' = 'NECAB1',
  'HPA Lower layer' = 'PCP4',

  'GABAergic' = 'GAD1',
  'PVALB interneuron' = 'PVALB',
  'SST interneuron' = 'SST',
  '5HT3aR interneuron' = 'HTR3A',
  'VIP interneuron' = 'VIP', #Subset of 5HTraR

  'Cholinergic' = 'ACHE',
  'Dopaminergic' = 'TH',
  'Serotinergic' = 'TPH1',

  'Astrocytes' = 'AQP4',

  'Oligodendrocytes.1' = 'PLP1',
  'Oligodendrocytes.1' = 'MOG',

  'OPC' = 'PDGFRA',

  'Endothelial cells' = 'APOLD1',

  'Microglia.1' = 'CCL4',
  'Microglia.2' = 'CCL3',
  'Microglia.3' = 'TMEM119',

  'Microglia and astrocytes' = 'CX3CR1',

  'pre-Bötzinger interneurons' = 'TACR1',
  'T cells' = 'CD3G'
  )

for (i in 1){

Glioblastoma.10x[[i]]$MESlike = colSums(Glioblastoma.10x[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.10x[[i]]@assays$RNA@scale.data) %in% c(modules['MES2'][modules['MES2'] != ''], modules['MES1'][modules['MES1'] != '']),])

Glioblastoma.10x[[i]]$AClike = colSums(Glioblastoma.10x[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.10x[[i]]@assays$RNA@scale.data) %in% modules['AC'][modules['AC'] != ''],])
Glioblastoma.10x[[i]]$OPClike = colSums(Glioblastoma.10x[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.10x[[i]]@assays$RNA@scale.data) %in% modules['OPC'][modules['OPC'] != ''],])
Glioblastoma.10x[[i]]$NPClike = colSums(Glioblastoma.10x[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.10x[[i]]@assays$RNA@scale.data) %in% c(modules['NPC2'][modules['NPC2'] != ''], modules['NPC1'][modules['NPC1'] != '']),])

features = c('MESlike', 'AClike', 'OPClike', 'NPClike', unlist(cell.markers)) 
count = 0
for (f in list(1:10, 11:20,21:30,31:43)){
  count = count + 1
  pdf(paste('figures/KnownMarkersFeaturePlot', as.character(count), '10x_Patient46.pdf', sep = ''), width = 20, height = 10)
  print(FeaturePlot(Glioblastoma.10x[[i]], features = features[f]))
  dev.off()
  print(FeaturePlot(Glioblastoma.10x[[i]], features = features[f]))
}
}

Visualize a priori chosen markers in a heatmap:

pdf('figures/KnownMarkersHeatmap_10x_Patient46.pdf', width = 20, height = 10)
print(DoHeatmap(Glioblastoma.10x[[1]], features = c('MESlike', 'AClike', 'OPClike', 'NPClike', unlist(cell.markers))) + NoLegend())
dev.off()
## png 
##   2
DoHeatmap(Glioblastoma.10x[[1]], features = c('MESlike', 'AClike', 'OPClike', 'NPClike', unlist(cell.markers))) + NoLegend()

Assign identity to cluster based on marker expression:

new.cluster.ids <- c("0: Oligodendrocytes1",
                     "1: Oligodendrocytes2",
                     "2: Microglia",
                     "3: OPClike1",
                     "4: OPClike2",
                     "5: Microglia",
                     "6: Oligodendrocytes3",
                     "7: AClike",
                     "8: Unclassified", 
                     "9: OPClike3",
                     "10: Endothelial Cells",
                     "11: MESlike",
                     "12: NPClike")
names(new.cluster.ids) <- levels(Glioblastoma.10x[[1]])
Glioblastoma.10x[[1]] <- RenameIdents(Glioblastoma.10x[[1]], new.cluster.ids)
pdf('figures/UMAPwithClusterLabel_10x_Patient46.pdf', width = 20, height = 10)
print(DimPlot(Glioblastoma.10x[[1]], reduction = "umap", label = TRUE, pt.size = 0.5, label.size = 8) + NoLegend())
dev.off()
## png 
##   2
DimPlot(Glioblastoma.10x[[1]], reduction = "umap", label = TRUE, pt.size = 0.5, label.size = 8) + NoLegend()

Integrate with smartSeq data:

reference.list <- list(Glioblastoma.10x[[1]], Glioblastoma.SmartSeq[[1]])
glioblastoma.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:n_dimensions)
glioblastoma.integrated <- IntegrateData(anchorset = glioblastoma.anchors, dims = 1:n_dimensions)

Visualize integrated data:

library(cowplot)
library(patchwork)
# switch to integrated assay. The variable features of this assay are automatically
# set during IntegrateData
DefaultAssay(glioblastoma.integrated) <- "integrated"

# Run the standard workflow for visualization and clustering
glioblastoma.integrated <- ScaleData(glioblastoma.integrated, verbose = FALSE)
glioblastoma.integrated <- RunPCA(glioblastoma.integrated, npcs = n_dimensions, verbose = FALSE)
glioblastoma.integrated <- RunUMAP(glioblastoma.integrated, reduction = "pca", dims = 1:n_dimensions)
p1 <- DimPlot(glioblastoma.integrated, reduction = "umap", group.by = "Technology")
p2 <- DimPlot(glioblastoma.integrated, reduction = "umap", label = TRUE, 
    repel = TRUE) + NoLegend()
pdf('figures/UMAPintegrated_Patient46.pdf', width = 20, height = 10)
print(p1 + p2)
dev.off()
## png 
##   2
p1+p2

Now classify cells and make plot of cell proportions:

# Do not include unclassified cells in reference:

Reference = Glioblastoma.10x[[1]][,Idents(Glioblastoma.10x[[1]]) != '8: Unclassified']

glioblastoma.anchors <- FindTransferAnchors(reference = Reference, query = Glioblastoma.SmartSeq[[1]], dims = 1:n_dimensions)
predictions <- TransferData(anchorset = glioblastoma.anchors, refdata = Idents(Reference), dims = 1:n_dimensions)
Glioblastoma.SmartSeq[[1]] <- AddMetaData(Glioblastoma.SmartSeq[[1]], metadata = predictions)

Also make plots of integrations scores:

# Do not include unclassified cells in reference:

hist(unname(Glioblastoma.SmartSeq[[1]]$prediction.score.max[Glioblastoma.SmartSeq[[1]]$predicted.id == '3: OPClike1']),xlim = c(0,1), xlab = 'Classification Score', main = 'Histogram of OPClike1 Classification Scores')

hist(unname(Glioblastoma.SmartSeq[[1]]$prediction.score.max[Glioblastoma.SmartSeq[[1]]$predicted.id == '4: OPClike2']),xlim = c(0,1), xlab = 'Classification Score', main = 'Histogram of OPClike2 Classification Scores')

hist(unname(Glioblastoma.SmartSeq[[1]]$prediction.score.max[Glioblastoma.SmartSeq[[1]]$predicted.id == '5: Microglia']),xlim = c(0,1), xlab = 'Classification Score', main = 'Histogram of Microglia Classification Scores')

Finally visualize cell proportions in 10x and SmartSeq data:

# Do not include unclassified cells in reference:

tab = c(table(Idents(Reference)), table(Glioblastoma.SmartSeq[[1]]$predicted.id))

# create a dataset
tech <- c(rep("10X" , 12) , rep("SmartSeq" , 3))
celltype <- names(tab)
value <- unname(tab)
data <- data.frame(tech,celltype,value)
 
# Stacked + percent
pdf('figures/CellProportions_Patient46.pdf', width = 20, height = 10)
ggplot(data, aes(fill=celltype, y=value, x=tech)) + geom_bar(position="fill", stat="identity")
dev.off()
## png 
##   2
ggplot(data, aes(fill=celltype, y=value, x=tech)) + geom_bar(position="fill", stat="identity")

To be sure we also cluster the SmartSeq data on its own:

Calculate principal components:

for (i in 1){
Glioblastoma.SmartSeq[[i]] <- RunPCA(Glioblastoma.SmartSeq[[i]], features = VariableFeatures(object = Glioblastoma.SmartSeq[[i]]))
print(Glioblastoma.SmartSeq[[i]][["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(Glioblastoma.SmartSeq[[i]], dims = 1:2, reduction = "pca")
DimPlot(Glioblastoma.SmartSeq[[i]], reduction = "pca")
DimHeatmap(Glioblastoma.SmartSeq[[i]], dims = 1, cells = round(dim(Glioblastoma.SmartSeq[[i]])[2])/2, balanced = TRUE)
DimHeatmap(Glioblastoma.SmartSeq[[i]], dims = 1:15, cells = round(dim(Glioblastoma.SmartSeq[[i]])[2])/2, balanced = TRUE)
}
## PC_ 1 
## Positive:  C1QB, TYROBP, C1QC, CAPG, AIF1 
## Negative:  KCNQ5, DMD, NLGN1, CREB5, MLLT3 
## PC_ 2 
## Positive:  PARP14, TNFAIP2, NFKBIA, LYN, ZC3HAV1 
## Negative:  FDFT1, PDIA6, ACAT2, CLU, PON2 
## PC_ 3 
## Positive:  ASPM, NDC80, UBE2C, KIF23, TTK 
## Negative:  HIST1H4H, LINC01170, TOX, SCG2, FOXN3 
## PC_ 4 
## Positive:  IFIH1, PLA2G4A, PPM1K, GBP4, ISG20 
## Negative:  PSMC3IP, LINC01170, ASPM, THRB, GLRA2 
## PC_ 5 
## Positive:  LAYN, ANXA1, LCTL, CD44, TCIM 
## Negative:  ONECUT2, DPP6, RIT2, AC004852.2, NRXN1

This is the clustering step:

n_dimensions = 14
for (i in 1){
Glioblastoma.SmartSeq[[i]] <- FindNeighbors(Glioblastoma.SmartSeq[[i]], dims = 1:n_dimensions)
Glioblastoma.SmartSeq[[i]] <- FindClusters(Glioblastoma.SmartSeq[[i]], resolution = 0.5)
head(Idents(Glioblastoma.SmartSeq[[i]]), 5)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 284
## Number of edges: 8286
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7954
## Number of communities: 4
## Elapsed time: 0 seconds

Clusters roughly agree with visual seperation on a UMAP plot:

for (i in 1){
Glioblastoma.SmartSeq[[i]] <- RunUMAP(Glioblastoma.SmartSeq[[i]], dims = 1:n_dimensions)
print(DimPlot(Glioblastoma.SmartSeq[[i]], reduction = "umap", label = TRUE))
}

Find and visualize top cluster markers:

i = 1
markers_S <- FindAllMarkers(Glioblastoma.SmartSeq[[i]], only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
top = markers_S %>% group_by(cluster) %>% top_n(n = 1, wt = avg_logFC)
pdf('figures/FeaturePlotUnbiasedClusterMarkers_SmartSeq_Patient46.pdf', width = 20, height = 10)
print(FeaturePlot(Glioblastoma.SmartSeq[[i]], features = top$gene))
dev.off()
## png 
##   2
print(FeaturePlot(Glioblastoma.SmartSeq[[i]], features = top$gene))

top10<- markers_S %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
pdf('figures/HeatmapUnbiasedClusterMarkers__SmartSeq_Patient46.pdf', width = 20, height = 10)
print(DoHeatmap(Glioblastoma.SmartSeq[[i]], features = top10$gene, cells = 1:dim(Glioblastoma.SmartSeq[[i]])[2] + NoLegend()) + 
    theme(text = element_text(size = 5)))
dev.off()
## png 
##   2
print(DoHeatmap(Glioblastoma.SmartSeq[[i]], features = top10$gene, cells = 1:dim(Glioblastoma.SmartSeq[[i]])[2] + NoLegend()) + 
    theme(text = element_text(size = 5)))

Vizualize a priori chosen markers of tumour subtypes:

for (i in 1){

Glioblastoma.SmartSeq[[i]]$MESlike = colSums(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data) %in% c(modules['MES2'][modules['MES2'] != ''], modules['MES1'][modules['MES1'] != '']),])

Glioblastoma.SmartSeq[[i]]$AClike = colSums(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data) %in% modules['AC'][modules['AC'] != ''],])
Glioblastoma.SmartSeq[[i]]$OPClike = colSums(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data) %in% modules['OPC'][modules['OPC'] != ''],])
Glioblastoma.SmartSeq[[i]]$NPClike = colSums(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data) %in% c(modules['NPC2'][modules['NPC2'] != ''], modules['NPC1'][modules['NPC1'] != '']),])

features = c('MESlike', 'AClike', 'OPClike', 'NPClike', unlist(cell.markers)) 
count = 0
for (f in list(1:10, 11:20,21:30,31:43)){
  count = count + 1
  pdf(paste('figures/KnownMarkersFeaturePlot', as.character(count), '_SmartSeq_Patient46.pdf', sep = ''), width = 20, height = 10)
  print(FeaturePlot(Glioblastoma.SmartSeq[[i]], features = features[f]))
  dev.off()
  print(FeaturePlot(Glioblastoma.SmartSeq[[i]], features = features[f]))
}
}

Visualize a priori chosen markers in a heatmap:

pdf('figures/KnownMarkersHeatmapSmartSeq_Patient46.pdf', width = 20, height = 10)
print(DoHeatmap(Glioblastoma.SmartSeq[[1]], features = c('MESlike', 'AClike', 'OPClike', 'NPClike', unlist(cell.markers))) + NoLegend())
dev.off()
## png 
##   2
DoHeatmap(Glioblastoma.SmartSeq[[1]], features = c('MESlike', 'AClike', 'OPClike', 'NPClike', unlist(cell.markers))) + NoLegend()